set more off 
pause off
set logtype text
set mem 500M

set scheme s1color, permanently

*************** DESCRIPTION ***********************
* Consolidates input datasets into two main 
* datasets for analysis:
*
* 	- main_dataset_firm: firm level
* 	- main_dataset_ind: industry level
***************************************************   

*** MERGE DATASETS ***

* Compustat
use 2.intermediate\data_firm,clear
* FRED
merge m:1 year using 2.intermediate\fred_mapped, nogen
* BDS Entry 
merge m:1 year using 2.intermediate\entry_out, nogen
* BEA
merge m:1 year ind_short using 2.intermediate\BEA_industry, nogen
* Occ. Licensing
merge m:1 indcode using 2.intermediate\license_out, nogen
* Census Concentration
merge m:1 indcode year using 2.intermediate\cencon_out, nogen
* Industry spreads
merge m:1 indcode year using 2.intermediate\spread_data, nogen
* Regulation index
merge m:1 indcode year using 2.intermediate\regindex_out, nogen
* Mod-HHI
merge m:1 indcode year using 2.intermediate\mod_HHI_$currseg, nogen
	
rename herf_mod herf_adj
rename herf_votmod herf_vot

replace herf_adj = . if indcode == ""								 
replace herf_adj = 0 if indcode ~= "" & year >= 1980 & herf_adj == . // industries with no common ownership show up as . even though it should be zero
replace herf_vot = . if indcode == ""								 
replace herf_vot = 0 if indcode ~= "" & year >= 1980 & herf_adj == . // industries with no common ownership show up as . even though it should be zero

g mherf = herf_s + herf_adj
g mherf_vot = herf_s + herf_vot

order gvkey year indcode 
save 3.Final_data\main_dataset_firm_$currseg, replace



***


*************************************************** 
************ CONSTRUCT INDUSTRY DATASET ***********
*************************************************** 

*** Count observations by industry bucket and year; keep industry data and save
use 3.Final_data\main_dataset_firm_$currseg, clear
drop if year == 2016
egen a1_count = count(gvkey) if gvkey ~= ., by(indcode year)

bys indcode year: keep if _n==1
drop gvkey
egen indgroup = group(indcode)
sort indgroup year
xtset indgroup year

* COMPUTE INDUSTRY-LEVEL QUANTITIES 

*** Compute aggregate investment ratios and changes, by industry
g a1_q = a1_mv/a1_at if a1_count > 5
g a1_qadj = a1_mvadj/a1_atadj if a1_count > 5
g a1_ca = a1_che/a1_at if a1_count > 5
g a1_blev = a1_bliab/a1_at if a1_count > 5
g a1_paya  = a1_pay/a1_at if a1_count > 5
g a1_payos  = a1_pay/a1_os_cp if a1_count > 5
g a1_bba  = a1_bb/a1_at if a1_count > 5
g a1_bbos  = a1_bb/a1_os_cp if a1_count > 5
g a1_xrdat  = a1_xrd/a1_at if a1_count > 5
g a1_gwa = a1_gdwl/a1_at if a1_count > 5
g a1_intanat = a1_intan/a1_at if a1_count > 5
g a1_intanexgwat = (a1_intan-a1_gdwl)/a1_at if a1_count > 5
g a1_nblev = (a1_bliab - a1_che)/a1_at if a1_count > 5
g a1_txdba = a1_txdb/a1_at if a1_count > 5

* Cash flow
g a1_cfat = a1_cf/l.a1_at if a1_count > 5
g a1_cfk1 = a1_cf/l.a1_kdef1 if a1_count > 5
g a1_cfk2 = a1_cf/l.a1_kdef2 if a1_count > 5

* Investment measures
g a1_ik1 = a1_inv1/l.a1_kdef1 if a1_count > 5
g a1_ik3 = a1_inv3/l.a1_kdef3 if a1_count > 5
g a1_ik4 = a1_inv4/l.a1_kdef4 if a1_count > 5
g a1_ik5 = a1_inv5/l.a1_kdef5 if a1_count > 5

g a1_nik1 = (a1_inv1-a1_dp1)/l.a1_kdef1 if a1_count > 5
g a1_nik2 = (a1_inv2-a1_dp2)/l.a1_kdef2 if a1_count > 5
g a1_nik3 = (a1_inv3-a1_dp3)/l.a1_kdef3 if a1_count > 5
g a1_nik4 = (a1_inv4-a1_dp4)/l.a1_kdef4 if a1_count > 5
g a1_nik5 = (a1_inv5-a1_dp5)/l.a1_kdef5 if a1_count > 5
g a1_nik6 = (a1_inv6-a1_dp6)/l.a1_kdef6 if a1_count > 5
g a1_osk_cp = a1_os_cp/a1_kdef1 if a1_count > 5

* Entry
g a1_logemp = log(a1_emp) 
g a1_logq = log(a1_q)
g a1_logppe = log(a1_ppe)

* Use of proceeds aggregate quantities, by industry
g a1_defat = a1_findef/l.a1_at
g a1_diat = a1_ndebtiss/l.a1_at
g a1_eiat = a1_neqiss/l.a1_at
g a1_divat = a1_dv/l.a1_at
g a1_invdefat = a1_inv_def/l.a1_at
g a1_dwcat = a1_dnwc_def/l.a1_at

* Cash flow decomposition
g a1_ibcat = a1_ibc/l.a1_at
g a1_xidocat = a1_xidoc/l.a1_at
g a1_dpcat = a1_dpc/l.a1_at
g a1_txdcat = a1_txdc/l.a1_at
g a1_cfotherat = a1_cfother/l.a1_at

* Issuance decomposition
g a1_dfpct = a1_ndebtiss/a1_findef
g a1_efpct = a1_neqiss/a1_findef
g a1_dfpct2 = a1_diat/a1_defat
g a1_efpct2 = a1_eiat/a1_defat
g a1m_dfpct2 = a1m_diat/a1m_defat
g a1m_efpct2 = a1m_eiat/a1m_defat

* Debt holdings
g a1_dd1d = a1_dd1/a1_ltd
g a1_dd3d = a1_dd3c/a1_ltd
g a1_dd5d = a1_dd5c/a1_ltd

* Additional test metrics
g a1_extfindep_rz = (a1_capx - a1_cf_rz) / a1_capx
g a1_exteqfindep_rz = a1_neqiss / a1_capx
g a1_extdebtfindep_rz = a1_ndebtiss / a1_capx
g a1_pifo_sh = a1_pifo/a1_pi

* Competition and governance percentiles
g a1_s3logN = s3.a1_logN
egen a1_comppct = xtile(a1_cpcon8_sale), by(year) p(33.33 66.66)
egen a1_entrypct = xtile(a1_s3logN), by(year) p(33.33 66.66)
egen a1_govpct = xtile(a1m_owntotQIX), by(year) p(33.33 66.66)

*** Remove outliers 
winsor2 a1_paya, replace cuts(1 99) by(year)
winsor2 a1_bba, replace cuts(1 99) by(year)
winsor2 a1_intanat, replace cuts(1 99) by(year)
winsor2 a1_xrdat, replace cuts(1 99) by(year)
winsor2 a1_bbos, replace cuts(1 99) by(year)
winsor2 a1_payos, replace cuts(1 99) by(year)
winsor2 a1_defat, replace cuts(1 99) by(year)
winsor2 a1_diat, replace cuts(1 99) by(year)
winsor2 a1_eiat, replace cuts(1 99) by(year)
winsor2 a1_dfpct, replace cuts(1 99) by(year)
winsor2 a1_efpct, replace cuts(1 99) by(year)
winsor2 a1_extfindep_rz, replace cuts(1 99) by(year)
winsor2 a1_exteqfindep_rz, replace cuts(1 99) by(year)
winsor2 a1_extdebtfindep_rz, replace cuts(1 99) by(year)
winsor2 a1_pifo_sh, replace cuts(1 99) by(year)

*** Add labels
label variable a1_q "Tobin's Q (CS)"
label variable a1_blev "Aggregate blev"
label variable a1_paya "Payout/assets"
label variable a1_bba "Buyback/assets"
label variable a1_bbos "Buyback/operating surplus"
label variable a1_payos "Payout/operating surplus"
label variable a1_gwa "Goodwill/Assets"

label variable a1_cfk1 "Cash flow/PP&E (CS)"
label variable a1_cfk2 "Cash flow/fixed assets (CS)"

label variable a1m_defat "Mean financing deficit - by industry"
label variable a1m_diat "Mean net debt issuance - by industry"
label variable a1m_eiat "Mean net equity issuance - by industry"
label variable a1_defat "Financing deficit - by industry"
label variable a1_diat "Net debt issuance - by industry"
label variable a1_eiat "Net equity issuance - by industry"

label variable a1_extfindep_rz "External finance dependence"
label variable a1_exteqfindep_rz "External equity issuance dependence"
label variable a1_extdebtfindep_rz "External debt issuance dependence"
label variable a1_pifo_sh "Share of foreign pretax income"
label variable a1_xrdat "R&D expense/Assets"

* COMPUTE AGGREGATE QUANTITIES 

* Compute aggregate investment ratios, across industries
g a_q = a_mv/a_at
g a_qadj = a_mvadj/a_atadj
g a_blev = a_bliab/a_at
g a_paya  = a_pay/a_at
g a_bba  = a_bb/a_at
g a_intanat  = a_intan/a_at
g a_intanexgwat  = (a_intan-a_gdwl)/a_at if a1_count > 5

* Investment measures
g a_ik1 = a_inv1/l.a_kdef1 
g a_ik2 = a_inv2/l.a_kdef2 
g a_niat1 = (a_inv1-a_dp1)/l.a_at 
g a_nik1 = (a_inv1-a_dp1)/l.a_kdef1 
g a_nik2 = (a_inv2-a_dp2)/l.a_kdef2 
g a_ios_cp1 = a_inv1/l.a_os_cp 
g a_ios_cp2 = a_inv2/l.a_os_cp
g a_osk_cp1 = a_os_cp/l.a_kdef1
g a_osk_cp2 = a_os_cp/l.a_kdef2

g a_iv1 = a_inv1/l.a_mv
g a_iv2 = a_inv2/l.a_mv 

* Use of proceeds 
g a_defat = a_findef/l.a_at
g a_diat = a_ndebtiss/l.a_at
g a_eiat = a_neqiss/l.a_at
g a_divat = a_dv/l.a_at
g a_invdefat = a_inv_def/l.a_at
g a_dwcat = a_dnwc_def/l.a_at

g a_dfpct = a_ndebtiss/a_findef
g a_efpct = a_neqiss/a_findef
g a_dfpct2 = a_diat/a_defat
g a_efpct2 = a_eiat/a_defat
g am_dfpct2 = am_diat/am_defat
g am_efpct2 = am_eiat/am_defat

g a_extfin = a_diat + a_eiat
g am_extfin = am_diat + am_eiat
g a_cdat = a_dv/l.a_at

* Herfindahls
egen awtmean_herf = wtmean(herf_s), weight(a1_sale) by(year)
egen amean_herf = mean(herf_s), by(year)
egen amed_herf = median(herf_s), by(year)

egen awtmean_mherf = wtmean(mherf), weight(a1_sale) by(year)
egen amean_mherf = mean(mherf), by(year)
egen amed_mherf = median(mherf), by(year)

label variable amean_herf "Mean US Herf"

*** Add labels

label variable a_q "Aggregate q"
label variable a_blev "Aggregate blev"
label variable a_paya "Aggregate payout/assets (CS)"

label variable a_ik1 "Capx/PP&E (CS)"
label variable a_ik2 "SCF LT investment/fixed assets (CS)"
label variable a_nik1 "Net capx/PP&E (CS)"
label variable a_nik2 "Net SCF LT investment/fixed assets (CS)"
label variable a_ios_cp1 "Capx/PP&E (CS)"
label variable a_ios_cp2 "SCF LT investment/fixed assets (CS)"
label variable a_kdef1 "PP&E (CS)"
label variable a_kdef2 "Fixed assets (CS)"

label variable a_owninsTRA "Aggregate TRA ownership"
label variable a_owninsDED "Aggregate D ownership"
label variable a_owninsQIX "Aggregate QIX ownership"

label variable am_defat "Mean financing deficit"
label variable am_diat "Mean net debt issuance"
label variable am_eiat "Mean net equity issuance"
label variable a_defat "Financing deficit"
label variable a_diat "Net debt issuance"
label variable a_eiat "Net equity issuance"

label variable am_cdat "Mean dividends/assets"
label variable am_invdefat "Mean investments/assets"
label variable am_dwcat "Mean dWC/assets"
label variable a_divat "Dividends/assets"
label variable a_invdefat "Investments/assets"
label variable a_dwcat "dWC/assets"

* COMPUTE SIC-BASED QUANTITIES 
g a1sic_q = a1sic_mv / a1sic_at 

** COMPUTE COVERAGE METRICS

g a1_niv = a1_nik_all/l.a1_mv if a1_count > 5
g a_niv1 = a_nik_all/l.a_mv 
g a_payos = a_pay/a_os_bea
g a_bbos = a_bb/a_os_bea

label variable a_payos "Aggregate payout/operating surplus (CS)"

g a1c_ppek = a1_ppe/(1000*a1_kp_all)
g a1c_inv = a1_inv1/(1000*a1_ip_all)

replace a1c_ppek = 1 if a1c_ppek > 1 & a1c_ppek ~=.
replace a1c_inv = 1 if a1c_inv > 1 & a1c_inv ~=.

label variable a1c_inv  "CAPX coverage ratio"
label variable a1c_ppek  "PP&E coverage ratio"

egen avga1c_ppek = mean(a1c_ppek) if year > 2000 , by(indgroup) 
egen avga1c_inv = mean(a1c_inv) if year > 2000, by(indgroup) 

g keepind = 1 if avga1c_ppek >= 0.10 & avga1c_ppek~= . & avga1c_inv >= 0.10 & avga1c_inv ~= .
egen keepsum = sum(keepind), by (indgroup)


*** FINALIZE AND SAVE ***
*keep if a1_count >= 5
order indcode year 
drop indgroup
compress
save 3.Final_data\main_dataset_ind_$currseg, replace

* Add industry-level fields to firm dataset to ensure proper data population 
use 3.Final_data\main_dataset_firm_$currseg, clear
merge m:1 indcode year using 3.Final_data\main_dataset_ind_$currseg, keepusing( a* a1* *herf* ) update
drop _merge
drop if gvkey == .
save 3.Final_data\main_dataset_firm_$currseg, replace

use 3.Final_data\main_dataset_ind_$currseg, clear
drop if ind_short == "" 
save 3.Final_data\main_dataset_ind_$currseg, replace

* Tests: manual end to end replication of critical quantities

use 3.Final_data\main_dataset_firm_BEA, clear
g test1 =  (a1m_licensed - 0.254545455) if ind_short == "Agriculture" 
g test2 =  (a1_ik_eq_bea - 0.154812) if ind_short == "Min_Oil_and_gas" & year == 2015
g test3 =  (a1_nik_eq_bea - 0.006974) if ind_short == "Min_Oil_and_gas" & year == 2015
g test4 =  a1_ik_all - 0.07931128 if ind_short == "Min_Oil_and_gas" & year == 2015
g test5 =  a1_nik_all - 0.00596529 if ind_short == "Min_Oil_and_gas" & year == 2015
su test*
drop test*
pause
